## Dreher and Gassebner 2012

library(foreign)
library(Amelia)
library(mi)
library(betareg)

## Load original dataset
dg2012 <- read.dta("DG2012 IO Rep Data.dta")
head(dg2012)
dim(dg2012)

## Drop ID vars
dg2012$wdi <- NULL

## Drop derived dummy vars
dg2012$t1 <- dg2012$t2 <- dg2012$t3 <- dg2012$t4 <- dg2012$t5 <- dg2012$t6 <- dg2012$t7 <- dg2012$t8 <- dg2012$t9 <- dg2012$t10 <- dg2012$t11 <- dg2012$t12 <- dg2012$t13 <- dg2012$t14 <- dg2012$t15 <- dg2012$t16 <- dg2012$t17 <- dg2012$t18 <- dg2012$t19 <- dg2012$t20 <- dg2012$t21 <- dg2012$t22 <- dg2012$t23 <- dg2012$t24 <- dg2012$t25 <- dg2012$t26 <- dg2012$t27 <- dg2012$t28 <- dg2012$t29 <- dg2012$t30 <- dg2012$t31 <- dg2012$t32 <- dg2012$t33 <- dg2012$t34 <- dg2012$t35 <- dg2012$t36 <- dg2012$t37 <- NULL

## How many variables? 45: no reduction necessary
dim(dg2012)

## Imputation
## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(dg2012)
mean(NAs(dg2012)/nrow(dg2012))*100

## Thus: 19 imputations

set.seed(02138)
dg2012.out <- amelia(dg2012, m = 19, ts = "year", cs = "id", lags = c("crisesadj", "crises", "lwb", "limf", "limf_sba", "lida_dis0_gdp", "librd_dis0_gdp", "lsba_dis_gdp", "lprgf_dis0_gdp", "lwbgr", "lwbcab", "lwbcpi", "lwbwbavg", "lwbresi", "limfgr", "limfcab", "limfcpi", "limfimfavg", "limfresi", "wb_own", "wb_inh", "imf_own", "imf_inh"), polytime = 3, empri = 0.01*nrow(dg2012))

write.amelia(obj=dg2012.out, file.stem = "DG2012 IO Imp Data", format = "dta", separate = FALSE)
